########################################################################## # Carl Schmertmann # 15 Nov 2013 # # For a selected forecast.year (usually 2010, but sometimes 1985), this program # calculates the forecast/posterior distribution for a surface over # 30 ages, 15..44 # 40 cohorts, forecast.year - (54:15) [ usually 1956..1995] # 10 initial cohorts forecast.year - (54:15) [ usually 1956..1965] with complete # (or very nearly complete*) data # # using any data that would have been available in that forecast.year # # * if only a few rates are missing for the earliest cohorts at the # youngest ages (e.g. Australia in a 2010 forecast), then we do a "freeze-rate" backcast as in # Myrskylä et al 2013 to fill in and make the forecast possible ########################################################################## rm(list=ls()) graphics.off() library(MASS) library(Matrix) forecast.year = 2010 infile = "calibrated penalty matrix.dput" Kjfile = "unweighted penalty matrices.RData" forecastFile = paste("Forecast ",format(Sys.time(), "%a %d %b %Y %H%M"),".dput", sep="") ## read data source("input rate and exposure data.R") ## set up a list that will contain forecast information forecast = vector("list", length(rates)) names(forecast) = names(rates) ## retrieve the calibrated penalty matrix K = dget(infile) ## retrieve the unweighted penalty matrices K1...K90 load(Kjfile) ## loop over countries in the dataset for (k in names(rates)) { print(paste("... starting ", full.name[k])) last.coh = forecast.year - 15 coh = last.coh - 39:0 surf = matrix(NA, 30, 40, dimnames=list(age, coh)) women = matrix(NA, 30, 40, dimnames=list(age, coh)) Data = rates[[k]] Expo = expos[[k]] ix = match( dimnames(surf)[[2]], dimnames(Data)[[2]]) oksurf = which(!is.na(ix)) okdata = ix[!is.na(ix)] surf[, oksurf] = Data[, okdata] women[, oksurf] = Expo[, okdata] ## backproject rates under 20, if any are missing for the first 5 cohorts ## this will allow projections for a few cases (eg Australia) without any meaningful ## change in the forecast gap = any( is.na(surf[paste(15:19), 1:5])) if (gap) { for (x in 15:19) { bad = is.na(surf[paste(x), 1:5]) if (any(bad)) { igap = which(bad) # which cohort cols 1-5 have missing data? inext = 1+max(igap) surf[paste(x), igap] = surf[paste(x), inext] women[paste(x), igap] = women[paste(x), inext] } # if any bad } # for x } # if gap ## vis is a 30 x 40 logical matrix: =TRUE if a cell is visible in this forecast year has.data = !is.na(surf) masked = outer(age,coh,"+") > forecast.year vis = has.data & !masked nvis = sum(vis) if (nvis > 0) { try( { complete.at.forecast = apply( matrix(vis,nrow=A), 2,all) # at forecast date, which coh had complete histories V = diag(length(surf))[vis,] PSIINV = diag( women[vis] / pmax(surf[vis],1e-6)) # inverse variances of observed estimates on diagonal of large matrix ## calculate posterior variance matrix VPOST and mean vector MPOST VPOST = solve( t(V) %*% PSIINV %*% V + K) MPOST = VPOST %*% t(V) %*% PSIINV %*% surf[vis] ## convert MPOST to an A x C surface mu = matrix(MPOST, nrow=A, dimnames=list(age, coh)) ss = matrix(sqrt(diag(VPOST)), nrow=A, dimnames=list(age, coh)) ## save results for post-processing if desired forecast[[k]]$forecast.year = forecast.year forecast[[k]]$last.coh = last.coh forecast[[k]]$vis = vis forecast[[k]]$surf = surf forecast[[k]]$mu = round(mu,5) forecast[[k]]$sd = round(ss,6) ##### CFRs #### C = diag(40) %x% matrix(1,1,30) mu.cfr = as.vector( C %*% MPOST) sd.cfr = sqrt(diag( C %*% VPOST %*% t(C) )) names(mu.cfr) = coh names(sd.cfr) = coh forecast[[k]]$mu.cfr = round( mu.cfr, 4) forecast[[k]]$sd.cfr = round( sd.cfr, 6) ## penalties pi[1]...pi[J] pen = vector("numeric", length(Kj)) names(pen) = names(Kj) for (j in names(pen)) { pen[j] = as.numeric( t(MPOST) %*% Kj[[j]] %*% MPOST ) } ## save the penalties pi[1]...pi[J] for this forecast forecast[[k]]$pi = round(pen,4) ##### if this is a 2010 forecast, also calculate and save the posterior means and SDs of ###### some selected cross-cohort differences in CFR if (forecast.year == 2010) { ## D %*% theta gives CFR1970-CFR1960, CFR1980-CFR1970, CFR1990-CFR1980) D = rbind( (matrix( 1*(coh==1970), nrow=1) - matrix( 1*(coh==1960), nrow=1)) %x% matrix(1,1,30), (matrix( 1*(coh==1980), nrow=1) - matrix( 1*(coh==1970), nrow=1)) %x% matrix(1,1,30), (matrix( 1*(coh==1990), nrow=1) - matrix( 1*(coh==1980), nrow=1)) %x% matrix(1,1,30), (matrix( 1*(coh==1975), nrow=1) - matrix( 1*(coh==1965), nrow=1)) %x% matrix(1,1,30), (matrix( 1*(coh==1985), nrow=1) - matrix( 1*(coh==1975), nrow=1)) %x% matrix(1,1,30), (matrix( 1*(coh==1995), nrow=1) - matrix( 1*(coh==1985), nrow=1)) %x% matrix(1,1,30) ) rownames(D) = c("CFR70-CFR60","CFR80-CFR70","CFR90-CFR80", "CFR75-CFR65","CFR85-CFR75","CFR95-CFR85") mu.change = as.vector(D %*% MPOST) names(mu.change) = rownames(D) sd.change = sqrt( diag( as.matrix(D %*% VPOST %*% t(D)) ) ) names(sd.change) = rownames(D) forecast[[k]]$mu.change = round(mu.change, 3) forecast[[k]]$sd.change = round( sd.change, 4) } # if forecast.year = 2010 }) # try } # if nvis > 0 } # for k ## write the entire forecast to a file for later processing dput(forecast, file=forecastFile) print(forecastFile)